Notes: Data: 2020-11-03_Switzerland_upto_2018-11-31, Switzerland_2018-12-01_2020-11-03. Pre-processing: After gathering the data, the two datasets were merged and it was cleaned the duplicates and empty values, specially for dates

PACKAGES

1. RAW DATA PROCESSING

#CLEANING PROCESS TO JOIN GATHERED DATA FROM FLICKR

#data <- read.csv("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/2020-11-03_Switzerland_upto_2018-11-31.csv", encoding = "UTF-8" )
#saveRDS(data, "//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Compile_2018-11-03.rds")

#data2 <- read.csv("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Switzerland_2018-12-01_2020-11-03.csv", encoding = "UTF-8")
#saveRDS(data2, "//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Update_2020-11-03.rds")

# export dataframes to backup the main tables
#d1 <- readRDS("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Compile_2018-11-03.rds")
#d2 <- readRDS("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Raw Data/Update_2020-11-03.rds")

#Join two dataset in a single main dataframe
#raw <-  dplyr::bind_rows(d1, d2)

#raw$tags <- gsub(";", "  ", raw$tags) # Remove symbol in column tags
#raw$post_title <- gsub(";", "  ", raw$post_title) # Remove symbol in column post_title
#raw$post_body <- gsub(";", "  ", raw$post_body) # Remove symbol in column post_body

#raw_clean <- subset(raw, select= c("post_guid", "longitude", "latitude", "user_guid", "post_create_date", "post_publish_date", "tags", "post_title", "post_body", "place_guid"))

#raw_clean <- distinct(raw_clean, post_guid, .keep_all = TRUE)
#raw_clean <- filter(raw_clean, longitude >= 0.0001) 

#Export the total table for future applications
#write.table(raw_clean,"~/GitHub/Flickr_SwissParks/Join2018-2020.csv", sep=";", dec=".")

1.1 FUNCTIONS

# define root folders for data
root_folder <- '~/GitHub/Flickr_SwissParks/'    # local folder 
setwd<- '~/GitHub/Flickr_SwissParks/'

# set coordinate referencing system (for changing CRS but not reprojecting)
crs_wgs84 <- "+init=epsg:4326"  # lat/lng
crs_sng <- "+init=epsg:2056"    # Swiss National, CH1903+ / LV95

# set coordinate referencing system (for changing CRS but not reprojecting)
crs_wgs84 <- "+init=epsg:4326"  # lat/lng
crs_osm <- "+init=epsg:3857"    # OSM projection
crs_bng <- "+init=epsg:27700"   # British National Grid, BNG
crs_sng <- "+init=epsg:2056"    # Swiss National, CH1903+ / LV95

# Set coordinate systems for reprojecting
proj_wgs84 <- '+proj=longlat +datum=WGS84'
proj_osm <- '+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0.0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs'
proj_bng <- '+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +datum=OSGB36 +units=m +no_defs'
proj_sng <- '+proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs'

# set point at which plot will switch from standard form
options(scipen=7)

# chart theme
t <- theme_bw() + 
  theme(panel.border=element_blank(),    # removes border around chart area
        axis.text = element_text(size = 10, colour='#444444'),
        axis.ticks = element_line(colour='gray'))

# reproject from lat/lng (WGS84) to another CRS
# ---------------------------------------------
# requires columns labeled 'lat', 'lng' in first two columns, and at least two other columns (4 cols minimum)
reproject <- function(df, proj_crs) {
  
  dataset_map_coords <- df[c("lng", "lat")]  # specifying names to avoid mixing x and y up in order...
  dataset_map_data <- df[ ,c(3:ncol(df))]    
  dataset_map <- SpatialPointsDataFrame(coords=dataset_map_coords, data=dataset_map_data)
  
  # set CRS and reproject to OSM for OSM number system
  dataset_map@proj4string # check first, should be NA - not yet been set
  proj4string(dataset_map) <- CRS(crs_wgs84)  # set the current coordinates system
  dataset_map <- spTransform(dataset_map, CRS(proj_crs))  # reproject to new CRS
  
  # convert back to dataframe with reprojected cooordinates and then amend to original dataset
  newdf <- as.data.frame(dataset_map)
  names(newdf)[names(newdf) == "lng"] <- 'crs_x'; 
  names(newdf)[names(newdf) == "lat"] <- 'crs_y'        
  newdf <- newdf[, c("crs_x", "crs_y")]   # just want to keep the reprojected coordinates to append back to dataset
  df <- cbind(df, newdf)
  return(df)
}


# load in the data files and standardise lat/lng column name for reproject
# ---------------------------------------
# load data file, rename coordinates columns
loaddata <- function(input_file) {
  
  folder <- paste0(root_folder, '')
  input_data <- paste0(folder, input_file)
  df <- read.csv(input_data, sep = ";", na.strings="0")
  
  # rename latitude and longitude columns to lat and lng
  names(df)[names(df) == "latitude"] <- "lat"
  names(df)[names(df) == "longitude"] <- "lng"
  
  return(df)
}

1.2 LOAD DATA

#Run the functions with the raw data
input_file <- 'Join2018-2020.csv'
orig <- loaddata(input_file)
orig$lat <- as.numeric(as.character(orig$lat))
orig$lng <- as.numeric(as.character(orig$lng))
orig$is_na = ifelse(is.na(orig$lat), TRUE, FALSE)
orig<-orig[!(orig$is_na=="TRUE"),]

1.2.1 PREPARATION OF THE DATASET

# Drop unneeded columns and rows to slim the dataset
# -----------------------------------------

#### a. Only pictures taken after 2004 (inclusive)
# Creating a subset of the data, just for picture that were taken after 2004
df <- orig
df$post_create <- as.Date(df$post_create_date)
reduced_04 <- filter(df, post_create >= "2004-01-01") # we create a new dataframe just for the data after 2004
reduced_04 <- filter(reduced_04, post_create <= "2020-11-03") # we create a new dataframe just for the data after 2004
trimmed <- reduced_04
rm(df)

# add in reprojection to CH1903+ / LV95
# -------------------------------------
# slice off min. fields to acquire reprojection coords, then rejoin to dataset
# order matters: need lat and lng in first two fields + two other fields for data (any will do)
df <- trimmed[, c(3, 2, 1, 4:10)]
proj_crs <- proj_sng  # reproject from WGS84 to Swiss nat.coords 
df <- reproject(df, proj_crs)
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Bessel 1841 ellipsoid in Proj4
## definition
df <- df[, c(11:12)]  # just keep the new coords and then merge back into source
trimmed <- cbind(trimmed, df)
trimmed <- trimmed[, c(1, 4:10, 13,14)]
rm(df)

# rename latitude and longitude columns to lat and lng
names(trimmed)[names(trimmed) == "crs_y"] <- "lat"
names(trimmed)[names(trimmed) == "crs_x"] <- "lng"
df <-  trimmed

1.2.2 SNIP TO DATA WITHIN SWISS BORDERS

# create SpatialPointsPolygon for data
df_map_coords <- df[c("lng", "lat")]
df_map_data <- df[ ,c(1:8)]
df_map <- SpatialPointsDataFrame(coords=df_map_coords, data=df_map_data)
proj4string(df_map) <- CRS(proj_sng)
df_map_St <- st_as_sf(df_map)

# preparation of shapefiles of land cover, cantonal limits and parks polygons.
parks <- readOGR(dsn="//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/GIS/Swiss_Parks.shp", use_iconv=TRUE, encoding="UTF-8")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS
## = dumpSRS, : Discarded datum CH1903+ in Proj4 definition: +proj=somerc
## +lat_0=46.9524055555556 +lon_0=7.43958333333333 +k_0=1 +x_0=2600000 +y_0=1200000
## +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## OGR data source with driver: ESRI Shapefile 
## Source: "\\files.geo.uzh.ch\shared\group\geocomp\jort_franziska_daniela\GIS\Swiss_Parks.shp", layer: "Swiss_Parks"
## with 29 features
## It has 13 fields
## Integer64 fields read as strings:  OBJECTID Rechtsgrun
## Warning in readOGR(dsn = "//files.geo.uzh.ch/shared/group/geocomp/
## jort_franziska_daniela/GIS/Swiss_Parks.shp", : Z-dimension discarded
parks <- spTransform(parks, CRS(proj_sng))
landcover <- readOGR("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/GIS/LandCover_Parks.shp", use_iconv=TRUE, encoding="UTF-8")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS
## = dumpSRS, : Discarded datum CH1903+ in Proj4 definition: +proj=somerc
## +lat_0=46.9524055555556 +lon_0=7.43958333333333 +k_0=1 +x_0=2600000 +y_0=1200000
## +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## OGR data source with driver: ESRI Shapefile 
## Source: "\\files.geo.uzh.ch\shared\group\geocomp\jort_franziska_daniela\GIS\LandCover_Parks.shp", layer: "LandCover_Parks"
## with 39308 features
## It has 5 fields
## Integer64 fields read as strings:  OBJECTID Id gridcode
landcover <- spTransform(landcover, CRS(proj_sng))
kanton <- readOGR("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/GIS/Swiss_Kanton.shp", use_iconv=TRUE, encoding="UTF-8")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS
## = dumpSRS, : Discarded datum CH1903+ in Proj4 definition: +proj=somerc
## +lat_0=46.9524055555556 +lon_0=7.43958333333333 +k_0=1 +x_0=2600000 +y_0=1200000
## +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +vunits=m
## +no_defs
## OGR data source with driver: ESRI Shapefile 
## Source: "\\files.geo.uzh.ch\shared\group\geocomp\jort_franziska_daniela\GIS\Swiss_Kanton.shp", layer: "Swiss_Kanton"
## with 50 features
## It has 20 fields
## Integer64 fields read as strings:  ERSTELL_J REVISION_J HERKUNFT_J KANTONSNUM EINWOHNERZ
## Warning in readOGR("//files.geo.uzh.ch/shared/group/geocomp/
## jort_franziska_daniela/GIS/Swiss_Kanton.shp", : Z-dimension discarded
kanton <- spTransform(kanton, CRS(proj_sng))


#Convert a geographical object in a simple feature with geometry
#Reduce the time of processing, making efficient the process of intersection
parks_St <- st_as_sf(parks)  
landcover_St <- st_as_sf(landcover)
kanton_St <- st_as_sf(kanton)

# Intersect points (Flickr images) with limits of parks
Flickr_park <- st_intersection(parks_St, df_map_St)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
# Intersect points (Flickr images) with land cover.
Flickr_park <- st_intersection(landcover_St, Flickr_park)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
# Include data of canton in the points data
Flickr_park <- st_intersection(kanton_St, Flickr_park)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Flickr_Parks_map.png")       

ggplot() + 
  geom_sf(data = kanton_St, fill = "grey", color = "white") +
  geom_sf(data = Flickr_park, fill = NA, color = "yellow", size=0.005) +
  geom_sf(data = parks_St, fill= NA, color = "dark green", size= 1) +
  ggtitle("Flickr pictures in Swiss National Parks") + 
  theme(plot.title = element_text(hjust = 0.5))+
  coord_sf()

rm(trimmed)
rm(df)
rm(t)
rm(df_map)
rm(df_map_data)
rm(df_map_coords)
rm(df_map_St)
rm(landcover)
rm(parks)
rm(kanton)

1.2.2 DATA TABLE EXPORTATION

flickr <- Flickr_park[c(39, 40, 41, 19, 23, 28, 38, 43, 44, 45, 47)]

# rename
names(flickr)[names(flickr) == "post_guid"] <- "photo_id"
names(flickr)[names(flickr) == "user_guid"] <- "USER"
names(flickr)[names(flickr) == "post_create_date"] <- "DATE"
names(flickr)[names(flickr) == "gridcode"] <- "USE"
names(flickr)[names(flickr) == "NAME"] <- "CANTON"
names(flickr)[names(flickr) == "SHAPE_Area"] <- "km2"
names(flickr)[names(flickr) == "tags"] <- "TAGS"
names(flickr)[names(flickr) == "Name"] <- "PARK"
names(flickr)[names(flickr) == "post_title"] <- "TITLE"
names(flickr)[names(flickr) == "post_body"] <- "BODY"

flickr <- flickr %>%
    mutate(lng = unlist(map(flickr$geometry,1)),
           lat = unlist(map(flickr$geometry,2)))
flickr <- st_drop_geometry(flickr)

#General statistics of each column
summary(flickr$photo_id)
##    Length     Class      Mode 
##     77697 character character
#The library dplyr provide several notifications/observations, so we are going to avoid to write in the report.
options(dplyr.summarise.inform = FALSE)

1.2.3 FILTERING THE LAND COVER

Only pictures that are in categories of land cover that are natural

# Using the table obtained before, we create another subset avoiding the pictures in the land cover 100, 120, 140, 160
# The pictures that are in land cover related to natural categories were considered
flickr_nat<- filter(flickr, USE!=100, USE!=120, USE!=140, USE!=160) # The expression USE! means avoid/reduce

#General statistics of each column
summary(flickr_nat$photo_id)
##    Length     Class      Mode 
##     56805 character character

2. GENERAL ANALYSIS OF PARKS

# Application of the filter of the use, only natural land cover
  
db <- flickr_nat #We create a copy for future use

ddbb <- db # We create another copy for future use

a. Number of visitors each Swiss National Parks, according the land cover,

NA values refers to users or land cover category that doesn’t exist in the park

#Filtering with a dynamic table to identify the parks, user, and the land cover.
landcover <- flickr%>%
  group_by(PARK, USE, USER)%>%
  summarise(land_picture = n()) #Total number of picture in a land cover, per park, taken by a user.

#Counting the number of users per park in each land cover
landcover <- landcover %>%
  group_by(PARK, USE) %>%
  summarise(park_landcover = n()) #Total number of user who took pictures in a land cover per park.

#Pivot table that create a matrix of park (rows) and land cover (columns)
lu<- pivot_wider(landcover, names_from = USE, values_from = park_landcover)
landcover_park <- as.data.frame(lu) # We transform the matrix into a dataframe 

#Rename of the codes (numbers) with the names of the days
    #Land cover categories that are reduced
names(landcover_park)[names(landcover_park)=="100"] <- "Building area"
names(landcover_park)[names(landcover_park)=="120"] <- "Traffic and transportation surface"
names(landcover_park)[names(landcover_park)=="140"] <- "Special settlement areas"
names(landcover_park)[names(landcover_park)=="160"] <- "Recreation and green spaces"

    #Land cover categories that are considered for the analysis
names(landcover_park)[names(landcover_park)=="200"] <- "Fruit growing, viticulture, horticulture"
names(landcover_park)[names(landcover_park)=="220"] <- "Arable and Forage Cultivation"
names(landcover_park)[names(landcover_park)=="240"] <- "Alpine farming"
names(landcover_park)[names(landcover_park)=="300"] <- "Forest"
names(landcover_park)[names(landcover_park)=="400"] <- "Lakes and rivers"
names(landcover_park)[names(landcover_park)=="420"] <- "Unproductives"

#Organizing the table by alphabetical order of Parks names
landcover_park <- landcover_park %>% arrange(PARK)
#Exporting the table to csv in the root folder
#write.csv(landcover_park,"~/GitHub/Flickr_SwissParks/Results/LandCover.csv", row.names = FALSE)

#Displaying  the table with a better format
kbl(landcover_park)%>%
  kable_styling(bootstrap_options = "striped", full_width = T, position = "left", latex_options = c("striped", "repeat_header"))
PARK Building area Traffic and transportation surface Special settlement areas Recreation and green spaces Arable and Forage Cultivation Alpine farming Forest Lakes and rivers Unproductives Fruit growing, viticulture, horticulture
Biosfera Val Müstair 53 28 1 6 69 79 77 23 64 NA
Jurapark Aargau 124 64 10 17 166 NA 102 43 1 25
Landschaftspark Binntal 15 10 1 2 52 14 42 17 39 NA
Naturpark Beverin 40 52 6 15 128 85 163 78 102 3
Naturpark Diemtigtal 7 8 NA NA 21 41 29 10 19 NA
Naturpark Gantrisch 66 48 3 9 195 81 139 89 26 9
Naturpark Pfyn-Finges 113 39 18 24 105 104 80 45 158 38
Naturpark Thal 29 22 1 1 57 50 87 2 3 6
Parc du Doubs 93 73 4 8 127 75 162 78 28 4
Parc Ela 151 90 16 29 277 158 307 84 162 9
Parc Jura vaudois 173 100 14 36 191 214 282 141 37 13
Parc naturel périurbain du Jorat NA 2 1 1 15 NA 38 3 NA NA
Parc naturel régional de la Vallée du Trient 58 45 42 4 67 81 223 61 220 15
Parc naturel régional Gruyère Pays-d’Enhaut 297 190 18 299 636 430 395 102 176 9
Parc régional Chasseral 118 80 13 20 155 172 152 5 5 31
Parco Val Calanca 7 4 2 NA 10 11 28 6 11 NA
Regionaler Naturpark Schaffhausen 22 16 3 3 57 1 39 10 1 12
Schweizerischer Nationalpark NA 1 NA NA NA 1 85 8 60 NA
UNESCO Biosphäre Entlebuch 25 9 1 4 75 74 68 10 34 1
Wildnispark Zürich Sihlwald 1 7 NA NA NA NA 50 5 NA NA
rm(lu)
rm(landcover)

b. Area (km2) of national parks

#Filtering in a dynamic table to extract Park and square area
#We need this step because some parks have several polygons (with different square area)
#So the extracted information of square area is provided by the surface of each polygon
#where the points (Flickr pictures) where located.
area_portions <- ddbb %>%
  group_by(PARK, km2) %>%
  summarise(photos_km = n())

  #Summarizing the total area of the polygons that compose the parks where the points where located.
area_park <- area_portions %>%
  group_by(PARK) %>%
  summarise(km = round(sum(km2),2)) #Round for having just two decimals, and sum for adding all polygons surface 

# Obtaining the total area (km2) covered by all parks:
area_park %>%
  summarize(area = sum(km))#km2
## # A tibble: 1 x 1
##    area
##   <dbl>
## 1 5848.
rm(area_portions)

c. Count of pictures in the parks

#Filtering in a dynamic table to extract the number of pictures per Park
photos <- ddbb %>%
  group_by(PARK) %>%
  summarise(photo = n()) %>% #Total number of pictures per park
  mutate(ratio_photos= round((photo / sum(photo))*100, 2)) %>% #Percentage with 2 decimal of pictures 
  arrange(desc(ratio_photos)) #Sort the table per percentage of pictures

#Filtering in a dynamic table to extract Park and square area#The total number of pictures is:
ddbb %>%
  summarize(count =  n()) #pictures
##   count
## 1 56805

d. Count of Users in the parks

#Filtering in a dynamic table to extract the number of pictures per Park and users
users_flickr <- ddbb %>%
  group_by(PARK, USER) %>%
  summarise(user_photos =  n())

#From the previous filter, we can count the number of users per park
users_flickr <- users_flickr %>%
  group_by(PARK) %>%
  summarise(user = n())%>%
  mutate(ratio_users= round((user / sum(user))*100, 2)) %>% 
  arrange(desc(ratio_users))

#The total summary about the number of users
users_flickr %>%
  summarize(users = sum(user))#users
## # A tibble: 1 x 1
##   users
##   <int>
## 1  5743

d. Statistic of area (km2), users and photos for each Swiss National Parks: NOTE: THESE VALUES ONLY CONSIDER THE NATURAL LAND COVER (the dismissed categories are: Building area”, “Traffic and transportation surface”, “Special settlement areas”, “Recreation and green spaces”

general <- data.frame("Parks"= area_park$PARK, "Area(km2)"= area_park$km, "#Flickr"= photos$photo, "%Flickr"= photos$ratio_photos, "Flickr/km2"=round((photos$photo/area_park$km),2),"#Users"=users_flickr$user, "%Users"=users_flickr$ratio_users, "Users/km2"=round((users_flickr$user/area_park$km),2), check.names=FALSE )

general <- general %>% arrange(Parks)

kbl(general) %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Parks Area(km2) #Flickr %Flickr Flickr/km2 #Users %Users Users/km2
Biosfera Val Müstair 198.64 13340 23.48 67.16 1134 19.75 5.71
Jurapark Aargau 299.05 6201 10.92 20.74 642 11.18 2.15
Landschaftspark Binntal 164.78 6056 10.66 36.75 530 9.23 3.22
Naturpark Beverin 515.11 4324 7.61 8.39 453 7.89 0.88
Naturpark Diemtigtal 135.50 4250 7.48 31.37 362 6.30 2.67
Naturpark Gantrisch 405.68 3015 5.31 7.43 358 6.23 0.88
Naturpark Pfyn-Finges 276.51 2990 5.26 10.81 357 6.22 1.29
Naturpark Thal 139.39 2885 5.08 20.70 338 5.89 2.42
Parc du Doubs 293.65 2786 4.90 9.49 305 5.31 1.04
Parc Ela 657.82 1769 3.11 2.69 239 4.16 0.36
Parc Jura vaudois 530.62 1715 3.02 3.23 198 3.45 0.37
Parc naturel périurbain du Jorat 9.38 1529 2.69 163.01 172 2.99 18.34
Parc naturel régional de la Vallée du Trient 206.92 1317 2.32 6.36 129 2.25 0.62
Parc naturel régional Gruyère Pays-d’Enhaut 632.10 1290 2.27 2.04 120 2.09 0.19
Parc régional Chasseral 473.30 1140 2.01 2.41 107 1.86 0.23
Parco Val Calanca 120.49 759 1.34 6.30 86 1.50 0.71
Regionaler Naturpark Schaffhausen 213.03 587 1.03 2.76 71 1.24 0.33
Schweizerischer Nationalpark 170.33 364 0.64 2.14 51 0.89 0.30
UNESCO Biosphäre Entlebuch 394.49 326 0.57 0.83 46 0.80 0.12
Wildnispark Zürich Sihlwald 10.95 162 0.29 14.79 45 0.78 4.11
#write.csv(general,"~/GitHub/Flickr_SwissParks/Results/General.csv", row.names = FALSE)

3. TEMPORAL ANALYSIS OF PARKS (Using users’ visits)

#Pictures taken by user
contrib <- ddbb %>%
  group_by(PARK, USER)%>%
  summarise(photo = n())
summary(contrib$photo)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    1.000    1.000    2.000    9.891    6.000 1629.000
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Picture_Park_Taken.png")       

ggplot(contrib, aes(x=log(photo))) + 
  geom_histogram(aes(y=(..count..)/sum(..count..)),
                 binwidth = .1,
                 colour="black", fill="white")+
  facet_wrap(vars(PARK), labeller = label_wrap_gen(width=24))+
  theme_bw()+
  theme(axis.text.x = element_text(colour = "grey20", size = 6, angle = 90, hjust = 0.5, vjust = 0.5),
          axis.text.y = element_text(colour = "grey20", size = 6),
          strip.text = element_text(face = "italic"),
          text = element_text(size = 10))+
  ggtitle('Distribution of percentage of pictures taken by users in the Parks')

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Trend_Pictures.png")       

x <-contrib$photo
y <- x + rnorm(2500, 0, 50)   
## Warning in x + rnorm(2500, 0, 50): longer object length is not a multiple of
## shorter object length
qqplot(x, y)

3.1 YEARLY VISITOR IN THE PARKS

ddbb$DATE <- as.Date(ddbb$DATE)
ddbb$YEAR <- format(ddbb$DATE, format="%Y")

#Calculation of statistics per year
year_photo <- ddbb %>%
  group_by(PARK, YEAR, USER)%>%
  summarise(photo_yearly = n())

yearly_counts <- year_photo %>%
  group_by(PARK, YEAR)%>%
  summarise(VISITS = n())

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/general_trend.png")       

ggplot(data = yearly_counts, aes(x = YEAR, y = VISITS, group = 1)) +
    geom_line() +
    facet_wrap(vars(PARK), labeller = label_wrap_gen(width=24))+
    theme_bw()+
    theme(axis.text.x = element_text(colour = "grey20", size = 6, angle = 90, hjust = 0.5, vjust = 0.5),
          axis.text.y = element_text(colour = "grey20", size = 6),
          strip.text = element_text(face = "italic"),
          text = element_text(size = 10))+
  ggtitle('Annual distribution of users per Park')

rm(year_photo)

3.2 PARK USERS PER SEASON

Number of users per season in each Swiss National Parks

#create dates variable for your column that contains dates 
dates <- ddbb$DATE
#get the month of the date, create new column called month
ddbb$month<-(month(dates, label=TRUE))

ddbb$SEASON <- ifelse(ddbb$month %in% c('May','Jun','Jul'), "Summer",
                    ifelse (ddbb$month %in% c('Aug','Sep','Oct'), "Autumn",
                            ifelse (ddbb$month %in% c('Nove','Dec','Jan'), 
                                    "Winter", "Spring")))
#Calculation of pictures per user in seasons

season <- ddbb %>%
  group_by(PARK, SEASON, USER)%>%
  summarise(picture_season = n())

#From the previous table, we count the number of users per park in each season

f <- c("Summer", "Spring", "Winter", "Autumn")
color_list <- c("#ffbf00", "#00b04f","#00b0f0", "#ed7c31")
col_order <- c("PARK", "Autumn", "Winter", "Spring", "Summer")

season_users <- season %>%
  group_by(PARK, SEASON) %>%
  summarise(season_users = n())%>%
  mutate(SEASON =  factor(SEASON, levels = f))%>%
  arrange(SEASON)  

season_park<- pivot_wider(season_users, names_from = SEASON, values_from = season_users)
season_park <- season_park[, col_order]
season_park <- season_park %>% arrange(PARK)

#write.csv(season_park,"//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Season_Park.csv", row.names = FALSE)

  #Display of the table with a better format
kbl(season_park)%>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
PARK Autumn Winter Spring Summer
Biosfera Val Müstair 97 20 21 73
Jurapark Aargau 91 48 97 100
Landschaftspark Binntal 51 17 16 35
Naturpark Beverin 145 52 67 141
Naturpark Diemtigtal 31 16 22 20
Naturpark Gantrisch 145 102 138 124
Naturpark Pfyn-Finges 140 55 91 115
Naturpark Thal 53 28 49 43
Parc du Doubs 120 54 96 119
Parc Ela 269 122 158 215
Parc Jura vaudois 216 156 200 193
Parc naturel périurbain du Jorat 14 12 20 13
Parc naturel régional de la Vallée du Trient 213 44 85 178
Parc naturel régional Gruyère Pays-d’Enhaut 458 276 281 417
Parc régional Chasseral 145 94 122 138
Parco Val Calanca 21 3 15 17
Regionaler Naturpark Schaffhausen 39 20 25 34
Schweizerischer Nationalpark 72 11 9 35
UNESCO Biosphäre Entlebuch 63 36 52 53
Wildnispark Zürich Sihlwald 12 11 23 16
  #Stacked normalized horizontal bar graph
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/season_park.png") 

ggplot(season_users, #my data
       aes(x = PARK, y = season_users, fill = SEASON, group = SEASON)) + 
         geom_bar(position = "fill", #Creates stacked bars with 100% of proportion
                  stat="identity", 
                  color='white', 
                  width = 0.8)+
  scale_y_continuous(labels = scales::percent)+ # Change the name of the labels into percentages
  scale_fill_manual(values=color_list)+
  guides(fill = guide_legend(reverse = TRUE))+ #Change the order of the names in the label
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
        legend.position = 'bottom',
        axis.text.x = element_text(angle = 90, vjust = 0.15, hjust=0.15, size = 9),
        axis.text.y = element_text(size = 9),
        strip.background = element_rect(fill = "#17252D", color = "#17252D"),
        strip.text = element_text(size = rel(1), face = "bold", color = "white", margin = margin(5,0,5,0)))+
  scale_x_discrete(labels = function(x) str_wrap(x, width = 50))+
  ggtitle("Proportion of visitors per Season")+ 
  labs(x ="Parks", y = "Visitors")+ 
  ggplot2::coord_flip() #We change the axis to fit into horizontal bars

rm(season)
rm(season_users)

3.3 VISITS PER DAY OF THE WEEK

Number of visitors per day in each Swiss National Parks

ddbb$DAY <- weekdays(as.Date(ddbb$DATE))
ddbb$DAY <- factor(ddbb$DAY, levels = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))

photoUser_day <- ddbb %>%
  group_by(PARK, DAY, USER)%>%
  summarise(photo_day = n())

  #Counting the number of users per park in each season
daily <- photoUser_day %>%
  group_by(PARK, DAY) %>%
  summarise(visit_day = n())

sd<- pivot_wider(daily, names_from = DAY, values_from = visit_day)

daily_park <- as.data.frame(sd)
daily_park <- daily_park %>% arrange(PARK)

#Display of the table with a better format
kbl(daily_park)%>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
PARK Monday Tuesday Wednesday Thursday Friday Saturday Sunday
Biosfera Val Müstair 35 37 35 31 33 61 43
Jurapark Aargau 55 40 50 42 52 66 86
Landschaftspark Binntal 23 21 17 17 24 25 33
Naturpark Beverin 54 50 49 66 63 102 109
Naturpark Diemtigtal 10 9 12 10 16 28 27
Naturpark Gantrisch 57 63 68 73 81 122 139
Naturpark Pfyn-Finges 65 60 47 55 77 90 109
Naturpark Thal 23 16 20 26 16 39 54
Parc du Doubs 41 38 47 42 59 100 110
Parc Ela 118 126 114 118 138 174 184
Parc Jura vaudois 88 88 89 84 126 200 222
Parc naturel périurbain du Jorat 10 6 6 2 10 8 18
Parc naturel régional de la Vallée du Trient 73 75 67 79 77 115 131
Parc naturel régional Gruyère Pays-d’Enhaut 182 160 183 187 218 390 376
Parc régional Chasseral 65 64 62 60 85 113 152
Parco Val Calanca 10 10 9 7 9 21 12
Regionaler Naturpark Schaffhausen 14 12 19 9 15 33 33
Schweizerischer Nationalpark 18 20 17 21 18 26 23
UNESCO Biosphäre Entlebuch 38 24 27 34 35 46 56
Wildnispark Zürich Sihlwald 6 2 11 4 3 14 23
#write.csv(daily_park,"~/GitHub/Flickr_SwissParks/Results/Day-Visits_Park.csv", row.names = FALSE)

Bar plot of visitors per day in the Parks

#Horizontal bar chart of daily number of visitors per Park
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/daily_park.png")   #Export the graph as *.png

ggplot(daily, #my data
       aes(x = PARK, y = visit_day, fill = DAY, group = DAY)) +
         geom_bar(position = position_dodge(), 
                  stat="identity", 
                  color='white', 
                  width = 0.8)+
         scale_fill_manual(values=c("#2980b9", "#5dade2", "#76d7c4","#2ecc71", "#27ae60", "#f39c12","#d35400"))+
  theme(plot.title = element_text(hjust = 0.5, face = 'bold'),
        legend.position = 'bottom',
        axis.text.x = element_text(angle = 90, vjust = 0.15, hjust=0.15, size = 9),
        axis.text.y = element_text(size = 9),
        strip.background = element_rect(fill = "#17252D", color = "#17252D"),
        strip.text = element_text(size = rel(0.5), face = "bold", color = "white", margin = margin(5,0,5,0)))+
  scale_x_discrete(labels = function(x) str_wrap(x, width = 24))+
  xlab('Swiss National Parks') + ylab('Number of visitors') +
  ggtitle('Daily number of visitors')

Mosaic plot of visitors per day in the Parks (Two test)

  #GRAPH OF MOSAIC PLOT OF PARKS AND DAYS
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/daily_park_Mosaic1.png")       

ggplot(data = photoUser_day) +
  geom_mosaic(aes(x=product(DAY, PARK), fill = DAY), offset = 0.02) +
  scale_fill_manual(values=c("#27ae60","#2ecc71","#76d7c4", "#5dade2", "#2980b9", "#f39c12","#d35400"))+
  labs(y = "DAYS", title=" Mosaic plot of visits per Day in Parks ") +
  theme(legend.position = "none",
        axis.text.y=element_text(size = 10),
        axis.ticks.y=element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 10)
        )
## Warning: `unite_()` was deprecated in tidyr 1.2.0.
## Please use `unite()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

#POINT GRAPH OF FREQUENCY OF VISIT IN PARKS PER DAY
#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/daily_park_Mosaic2.png")       

ggplot(daily, aes(x = PARK, y = visit_day, color = DAY)) +
  geom_point() +
  labs(x = "Days", y = "Visits") +
  scale_color_manual(name = "Days:", values=c("#27ae60","#2ecc71","#76d7c4", "#5dade2", "#2980b9", "#f39c12","#d35400")) +
  theme(legend.title = element_text(size = 14, face = 2),
        legend.position = 'bottom',
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 8))+
  ggtitle('Daily total number of visitors in Parks')

4. USERS PROFILES

Analysis of users according the canton origin, visited park, and time span between the first and the last picture

4.1. ANALYSIS OF USERS BY CANTON

canton<- ddbb %>%
  group_by(CANTON, USER)%>%
  summarise(canton_user = n())

co<- pivot_wider(canton, names_from = CANTON, values_from = canton_user)
canton_users <- as.data.frame(co)
canton_users[is.na(canton_users)] = 0
canton_users <- canton_users %>% mutate_if(is.numeric, ~1 * (. > 0))
canton_users$Total_Canton = rowSums(canton_users[,c(2:13)])
canton_users <- canton_users[order(canton_users$Total_Canton, decreasing = FALSE),]

#write.csv(canton_users,"~/GitHub/Flickr_SwissParks/Results/Canton_Users.csv", row.names = FALSE)

ggplot(canton_users, aes(x=Total_Canton))+
  geom_histogram(binwidth=1 ,colour="black", fill="white")+
  geom_vline(aes(xintercept=mean(Total_Canton)), color="blue", linetype="dashed", size=1)+
  labs(title="Histogram of Cantons by users ",x="Number of visited Cantons", y = "Count of users")+
  scale_x_continuous(breaks = seq(0, 12, 1))+
  scale_y_continuous(breaks = seq(0, 6000, 200))

#Heatmap

coul <- colorRampPalette(c("beige", "green"))(5)

rownames(canton_users) <- canton_users[,1]
canton_users[,1]<- NULL
canton_users[,13]<- NULL
data <-as.matrix(canton_users)

heatmap(data, Colv = NA, Rowv = NA, scale = "column", col = coul, main = "Heatmap of User per Canton")

rm(canton)
rm(co)
rm(coul)

4.2. ANALYSIS OF USERS BY PARK

park <- ddbb %>%
  group_by(PARK, USER)%>%
  summarise(park_user = n())

po<- pivot_wider(park, names_from = PARK, values_from = park_user)
park_users <- as.data.frame(po)
park_users[is.na(park_users)] = 0
park_users <- park_users %>% mutate_if(is.numeric, ~1 * (. > 0))
park_users$Total_Park = rowSums(park_users[,c(2:21)])
park_users <- park_users[order(park_users$Total_Park, decreasing = FALSE),]

#write.csv(park_users,"~/GitHub/Flickr_SwissParks/Results/Park_Users.csv", row.names = FALSE)

ggplot(park_users, aes(x=Total_Park))+
  geom_histogram(binwidth=1 ,colour="black", fill="white")+
  geom_vline(aes(xintercept=mean(Total_Park)), color="blue", linetype="dashed", size=1)+
  labs(title="Histogram of Parks by users ",x="Number of visited Parks", y = "Count of users")+
  scale_x_continuous(breaks = seq(0, 20, 1))+
  scale_y_continuous(breaks = seq(0, 6000, 200))

#Heatmap

coul <- colorRampPalette(c("beige", "blue"))(10)

rownames(park_users) <- park_users[,1]
park_users[,1]<- NULL
park_users[,21]<- NULL
park_users <-as.matrix(park_users)

heatmap(park_users, Colv = NA, Rowv = NA, scale = "column", col = coul, main = "Heatmap of User per Park")

rm(park)
rm(po)
rm(coul)

4.3. ANALYSIS OF USERS TIME SPEND

user <- ddbb %>%
  group_by(USER)%>%
  summarise(photos = n())

user <- as.data.frame(user)

dt <- ddbb %>%
  group_by(USER, DATE)%>%
  summarise(photos = n())%>%
  mutate(DATE=as.Date(DATE))

dt1 <- dt %>%
    group_by(USER) %>%
    arrange(DATE) %>%
    slice(1L)

first <- as.data.frame(dt1)
names(first)[names(first)=="DATE"] <- "start"

dt2 <- dt %>%
    group_by(USER) %>%
     arrange(desc(DATE)) %>%
    slice(1L)

last <- as.data.frame(dt)
names(last)[names(last)=="DATE"] <- "end"

dt_final <- merge(last, first, by="USER")

dur <- dt_final %>%
  mutate(
    days = end - start,
    seconds = as.numeric(difftime(end, start)) / 365.25,
    years = round(interval(start, end) / years(1))
 )

time_user <-  merge(user, dur, by="USER", all=T)
time_user <- subset(time_user, select=-c(photos.x,photos.y))
time_user <- time_user[order(time_user$seconds, decreasing = TRUE),]

ggplot(time_user, aes(x=years))+
  geom_histogram(binwidth=1 ,colour="black", fill="white")+
  geom_vline(aes(xintercept=mean(years)), color="blue", linetype="dashed", size=1)+
  labs(title="Histogram of time between first and last picture by users ",x="time span in years", y = "Count of users")+
  scale_x_continuous(breaks = seq(0, 20, 1))+
  scale_y_continuous(breaks = seq(0, 6000, 500))

#write.csv(time_user,"~/GitHub/Flickr_SwissParks/Results/Time_Users.csv", row.names = FALSE)

rm(user)
rm(dt)
rm(dt1)
rm(dt2)
rm(last)
rm(dur)

5.SEMANTIC ANALYSIS

set.seed(42)

base <- data.frame(ddbb$photo_id, ddbb$USER, ddbb$PARK,  ddbb$TAGS)
names(base)[names(base) == "ddbb.PARK"] <- "doc_id"
names(base)[names(base) == "ddbb.TAGS"] <- "text"
names(base)[names(base) == "ddbb.USER"] <- "author"
names(base)[names(base) == "ddbb.photo_id"] <- "origin"

base$text <- gsub("[[:digit:]]", " ", base$text) # Remove numbers
base$text <- gsub("\\s+", " ", str_trim(base$text)) # Remove extra whitespaces
base$text <- gsub("(\\-¨“„)", " ", base$text) # Remove extra symbols
base$text <- gsub("(?!\\.)[[:punct:]]", " ", base$text, perl=TRUE) #remove punctuation

base_source=DataframeSource(base)
base_corpus=VCorpus(base_source)
base_corpus_clean <- tm_map(base_corpus, removeWords, stopwords("english"))
base_corpus_root <- tm_map(base_corpus_clean,stemDocument) #find to root of the words 

5.0 ANALYSIS: GENERAL

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Parks.png")       

wordcloud(base_corpus_root, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

#myStopwords <- setdiff(myStopwords, c("d", "e")) 
#text_corpus_clean <- tm_map(base_corpus_clean, removeWords, myStopwords)

5.2 ANALYSIS: Schweizerischer Nationalpark

#png(//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Nationalpark.png")       

sub_jurapark <- base%>%filter(doc_id=="Schweizerischer Nationalpark")
sub_jurapark=DataframeSource(sub_jurapark)
sub_jurapark=VCorpus(sub_jurapark)
sub_jurapark <- tm_map(sub_jurapark, removeWords, stopwords("english"))
sub_jurapark <- tm_map(sub_jurapark,stemDocument) #find to root of the words 
jurapark <- wordcloud(sub_jurapark, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.2 ANALYSIS: Jurapark Aargau

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Jurapark.png")       

sub_jurapark <- base%>%filter(doc_id=="Jurapark Aargau")
sub_jurapark=DataframeSource(sub_jurapark)
sub_jurapark=VCorpus(sub_jurapark)
sub_jurapark <- tm_map(sub_jurapark, removeWords, stopwords("english"))
sub_jurapark <- tm_map(sub_jurapark,stemDocument) #find to root of the words 
jurapark <- wordcloud(sub_jurapark, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.3 ANALYSIS: Parc naturel régional Gruyère Pays-d’Enhaut

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Gruyere.png")       

sub_gruyere <- base%>%filter(doc_id=="Parc naturel régional Gruyère Pays-d’Enhaut")
sub_gruyere=DataframeSource(sub_gruyere)
sub_gruyere=VCorpus(sub_gruyere)
sub_gruyere <- tm_map(sub_gruyere, removeWords, stopwords("english"))
sub_gruyere <- tm_map(sub_gruyere,stemDocument) #find to root of the words 
gruyere <- wordcloud(sub_gruyere, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.4 ANALYSIS: Biosfera Val Müstair

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Mustair.png")       

sub_mustair <- base%>%filter(doc_id=="Biosfera Val Müstair")
sub_mustair=DataframeSource(sub_mustair)
sub_mustair=VCorpus(sub_mustair)
sub_mustair <- tm_map(sub_mustair, removeWords, stopwords("english"))
sub_mustair <- tm_map(sub_mustair,stemDocument) #find to root of the words 
mustair <- wordcloud(sub_mustair, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.5 ANALYSIS: Wildnispark Zürich Sihlwald

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Sihlwald.png")       

sub_sihlwald <- base%>%filter(doc_id=="Wildnispark Zürich Sihlwald")
sub_sihlwald=DataframeSource(sub_sihlwald)
sub_sihlwald=VCorpus(sub_sihlwald)
sub_sihlwald <- tm_map(sub_sihlwald, removeWords, stopwords("english"))
sub_sihlwald <- tm_map(sub_sihlwald,stemDocument) #find to root of the words 
sihlwald <- wordcloud(sub_sihlwald, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.6 ANALYSIS: Parc régional Chasseral

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Chasseral.png")       

sub_chasseral <- base%>%filter(doc_id=="Parc régional Chasseral")
sub_chasseral=DataframeSource(sub_chasseral)
sub_chasseral=VCorpus(sub_chasseral)
sub_chasseral <- tm_map(sub_chasseral, removeWords, stopwords("english"))
sub_chasseral <- tm_map(sub_chasseral,stemDocument) #find to root of the words 
chasseral <- wordcloud(sub_chasseral, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.7 ANALYSIS: Parc Jura vaudois

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Vaudois.png")       

sub_vaudois <- base%>%filter(doc_id=="Parc Jura vaudois")
sub_vaudois=DataframeSource(sub_vaudois)
sub_vaudois=VCorpus(sub_vaudois)
sub_vaudois <- tm_map(sub_vaudois, removeWords, stopwords("english"))
sub_vaudois <- tm_map(sub_vaudois,stemDocument) #find to root of the words 
vaudois <- wordcloud(sub_vaudois, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.8 ANALYSIS: Parc Ela

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Ela.png")       

sub_ela <- base%>%filter(doc_id=="Parc Ela")
sub_ela=DataframeSource(sub_ela)
sub_ela=VCorpus(sub_ela)
sub_ela <- tm_map(sub_ela, removeWords, stopwords("english"))
sub_ela <- tm_map(sub_ela,stemDocument) #find to root of the words 
ela <- wordcloud(sub_ela, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.9 ANALYSIS: Naturpark Gantrisch

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Gantrisch.png")       

sub_gantrisch <- base%>%filter(doc_id=="Naturpark Gantrisch")
sub_gantrisch=DataframeSource(sub_gantrisch)
sub_gantrisch=VCorpus(sub_gantrisch)
sub_gantrisch <- tm_map(sub_gantrisch, removeWords, stopwords("english"))
sub_gantrisch <- tm_map(sub_gantrisch,stemDocument) #find to root of the words 
gantrisch <- wordcloud(sub_gantrisch, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.10 ANALYSIS: Naturpark Beverin

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Beverin.png")       

sub_beverin <- base%>%filter(doc_id=="Naturpark Beverin")
sub_beverin=DataframeSource(sub_beverin)
sub_beverin=VCorpus(sub_beverin)
sub_beverin <- tm_map(sub_beverin, removeWords, stopwords("english"))
sub_beverin <- tm_map(sub_beverin,stemDocument) #find to root of the words 
beverin <- wordcloud(sub_beverin, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.11 ANALYSIS: Regionaler Naturpark Schaffhausen

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Schaffhausen.png")       

sub_schaffhausen <- base%>%filter(doc_id=="Regionaler Naturpark Schaffhausen")
sub_schaffhausen=DataframeSource(sub_schaffhausen)
sub_schaffhausen=VCorpus(sub_schaffhausen)
sub_schaffhausen <- tm_map(sub_schaffhausen, removeWords, stopwords("english"))
sub_schaffhausen <- tm_map(sub_schaffhausen,stemDocument) #find to root of the words 
schaffhausen <- wordcloud(sub_schaffhausen, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.12 ANALYSIS: Parc naturel régional de la Vallée du Trient

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Trient.png")       

sub_trient <- base%>%filter(doc_id=="Parc naturel régional de la Vallée du Trient")
sub_trient=DataframeSource(sub_trient)
sub_trient=VCorpus(sub_trient)
sub_trient <- tm_map(sub_trient, removeWords, stopwords("english"))
sub_trient <- tm_map(sub_trient,stemDocument) #find to root of the words 
trient <- wordcloud(sub_trient, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.13 ANALYSIS: Parc du Doubs

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Doubs.png")       

sub_doubs <- base%>%filter(doc_id=="Parc du Doubs")
sub_doubs=DataframeSource(sub_doubs)
sub_doubs=VCorpus(sub_doubs)
sub_doubs <- tm_map(sub_doubs, removeWords, stopwords("english"))
sub_doubs <- tm_map(sub_doubs,stemDocument) #find to root of the words 
doubs <- wordcloud(sub_doubs, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.14 ANALYSIS: Naturpark Thal

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Thal.png")       

sub_thal <- base%>%filter(doc_id=="Naturpark Thal")
sub_thal=DataframeSource(sub_thal)
sub_thal=VCorpus(sub_thal)
sub_thal <- tm_map(sub_thal, removeWords, stopwords("english"))
sub_thal <- tm_map(sub_thal,stemDocument) #find to root of the words 
thal <- wordcloud(sub_thal, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.15 ANALYSIS: Naturpark Pfyn-Finges

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Pfyn-Finges.png")       

sub_pfyn_finges <- base%>%filter(doc_id=="Naturpark Pfyn-Finges")
sub_pfyn_finges=DataframeSource(sub_pfyn_finges)
sub_pfyn_finges=VCorpus(sub_pfyn_finges)
sub_pfyn_finges <- tm_map(sub_pfyn_finges, removeWords, stopwords("english"))
sub_pfyn_finges <- tm_map(sub_pfyn_finges,stemDocument) #find to root of the words 
pfyn_finges  <- wordcloud(sub_pfyn_finges, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.16 ANALYSIS: Naturpark Diemtigtal

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Diemigtal.png")       

sub_diemtigtal <- base%>%filter(doc_id=="Naturpark Diemtigtal")
sub_diemtigtal=DataframeSource(sub_diemtigtal)
sub_diemtigtal=VCorpus(sub_diemtigtal)
sub_diemtigtal <- tm_map(sub_diemtigtal, removeWords, stopwords("english"))
sub_diemtigtal <- tm_map(sub_diemtigtal,stemDocument) #find to root of the words 
diemtigtal  <- wordcloud(sub_diemtigtal, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.17 ANALYSIS: UNESCO Biosphäre Entlebuch

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Entlebuch.png")       

sub_entlebuch <- base%>%filter(doc_id=="UNESCO Biosphäre Entlebuch")
sub_entlebuch=DataframeSource(sub_entlebuch)
sub_entlebuch=VCorpus(sub_entlebuch)
sub_entlebuch <- tm_map(sub_entlebuch, removeWords, stopwords("english"))
sub_entlebuch <- tm_map(sub_entlebuch,stemDocument) #find to root of the words 
entlebuch  <- wordcloud(sub_entlebuch, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.18 ANALYSIS: Parco Val Calanca

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Calanca.png")       

sub_calanca<- base%>%filter(doc_id=="Parco Val Calanca")
sub_calanca=DataframeSource(sub_calanca)
sub_calanca=VCorpus(sub_calanca)
sub_calanca <- tm_map(sub_calanca, removeWords, stopwords("english"))
sub_calanca <- tm_map(sub_calanca,stemDocument) #find to root of the words 
calanca  <- wordcloud(sub_calanca, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.19 ANALYSIS: Landschaftspark Binntal

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Binntal.png")       

sub_binntal<- base%>%filter(doc_id=="Landschaftspark Binntal")
sub_binntal=DataframeSource(sub_binntal)
sub_binntal=VCorpus(sub_binntal)
sub_binntal <- tm_map(sub_binntal, removeWords, stopwords("english"))
sub_binntal <- tm_map(sub_binntal,stemDocument) #find to root of the words 
binntal  <- wordcloud(sub_binntal, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))

5.20 ANALYSIS: Parc naturel périurbain du Jorat

#png("//files.geo.uzh.ch/shared/group/geocomp/jort_franziska_daniela/Results/Words_Jorat.png")       

sub_jorat<- base%>%filter(doc_id=="Parc naturel périurbain du Jorat")
sub_jorat=DataframeSource(sub_jorat)
sub_jorat=VCorpus(sub_jorat)
sub_jorat <- tm_map(sub_jorat, removeWords, stopwords("english"))
sub_jorat <- tm_map(sub_jorat,stemDocument) #find to root of the words 
jorat  <- wordcloud(sub_jorat, 
          min.freq = 50,
          max.words = 100,
          random.order = FALSE,
          random.color = FALSE,
          colors = brewer.pal(8, "Dark2"))